package chapter03
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
object Test22_getMax {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    val conf = new SparkConf().setMaster("local[*]").setAppName("getMax")
    val sc = new SparkContext(conf)
    //读取文件
    val value = sc.textFile("input/Employee_salary_first_half.csv")
    val value1 = sc.textFile("input/Employee_salary_second_half.csv")
    //去掉表头
    val value2 = value.mapPartitionsWithIndex((index, e) => {
      if (index == 0) e.drop(1) else e
    })
    val value3 = value1.mapPartitionsWithIndex((index, e) => {
      if (index == 0) e.drop(1) else e
    })
    //数据合并
    val value4 = value2.union(value3)
    //取数据 名字 工资
    val tuples = value4.map(e => e.split(","))
      .map(e => (e(1), e(5).toInt))
      .reduceByKey((x, y) => (x + y))
      .sortBy(e => e._2,false)
      .take(3)
    println(tuples.toList)
  }
}
